##################################################################
# JLC descriptive analyses
##################################################################
#set working directory to location of replication archive
setwd("")
#clear working environment
rm(list=ls())
#load packages
library(tidyverse) 
library(stargazer)
library(urbnmapr)
library(readxl)

##################################################################
# Import LEDB data --------------
##################################################################  
ledb <- read.csv("./ledb_candidatelevel.csv")

table(ledb$office_consolidated)

ledb_prosecutors <- ledb %>% filter(office_consolidated=="Prosecutor") %>%
  arrange(state_abb,geo_name,year) %>%
  mutate(female=ifelse(gender_est=="F",1,0),
         nonwhite=ifelse(race_est!="caucasian",1,0),
         democrat=ifelse(pid_est=="D",1,0),
         vote_share=vote_share*100,
         winner=ifelse(winner=="win",1,0))

length(unique(ledb_prosecutors$ledb_candid)) #almost 4000 candidates, >6300 total obs

ledb_table <- ledb_prosecutors %>% select(vote_share,winner,incumbent,democrat,nonwhite,female)

stargazer(ledb_table,title="Prosecutors in American Local Elections Database, 1989-2021",
          covariate.labels = c("Vote Share","Winner","Incumbent",
                               "Democrat","Nonwhite","Female"),
          out="./tables/ledbdescription.tex",label="ledbdescription",
          notes="All cities and counties with more than 50,000 population (see de Benedictis-Kessner et al. 2023)")

#write.csv(ledb_prosecutors,"./ledb_prosecutors.csv",row.names=F,na="")

#https://ballotpedia.org/United_States_municipal_elections,_2024

#visualize percentage won, if there was challenger, margin of victory

##################################################################
# Import UNC data --------------
##################################################################  
unc_1217 <- read.csv("./Public Dataset (Updated).csv")

unc_1819 <- read.csv("./election_results.csv")

unc_all <- full_join(unc_1217,unc_1819) %>%
  mutate(incumbent=ifelse(incum_chall=="I",1,0),
         democrat=ifelse(party=="DEM",1,0),
         female=ifelse(cand_gender=="F",1,0),
         vote_percent_primary=ifelse(vote_percent_primary==1000,100,vote_percent_primary),
         term=election_year-incumbent_year,
         term=ifelse(term<0,0,term),
         winner_primary_int = ifelse(winner_primary=="W",1,0),
         winner_general_int = ifelse(winner_general=="W",1,0),
         district=str_trim(district))

summary(unc_all$vote_percent_primary)
summary(unc_all$vote_percent_general)

table(unc_all$incumbent_year)

table(unc_all$election_year,unc_all$incum_chall)

unc_table <- unc_all %>% select(vote_percent_primary,vote_percent_general,
                                winner_primary_int,winner_general_int,incumbent,term,democrat,female)

stargazer(unc_table,title="Prosecutors in Politics and Prosecutors Project Database, 2012-2019",
          covariate.labels = c("Vote Share (Primary)","Vote Share (General)",
                               "Winner (Primary)","Winner (General)",
                               "Incumbent","Years in Office",
                               "Democrat","Female"),
          out="./tables/uncdatadescription.tex",label="uncdatadescription",
          notes="All prosecutorial elections, 2012-2019 (see Hessick and Morse 2020)")

#Map
counties <- get_urbn_map(map = "counties")

ledb_avg <- ledb_prosecutors %>% 
  filter(state_abb!="AK") %>% filter(state_abb!="HI") %>%
  filter(winner==1) %>% filter(vote_share!=0) %>%
  filter(vote_share>49) %>% #this eliminates 10 cands appointed interim and those in runoffs
  group_by(fips) %>% dplyr::summarise(`Vote Share`=mean(vote_share)) %>%
  left_join(.,counties, by = c("fips"="county_fips"))

ledb_avg %>%
  ggplot(aes(long, lat, group = group, fill = `Vote Share`)) +
  geom_polygon(color = NA) +
  scale_fill_gradient(guide = guide_colorbar(title.position = "top"),
                      low = "black", high = "lightgray") +
  geom_polygon(data = states %>% filter(state_name!="Alaska") %>%
                                          filter(state_name!="Hawaii"), mapping = aes(long, lat, group = group),
               fill = NA, color = "gray") +
  coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
  theme(axis.line=element_blank(),axis.text.x=element_blank(),
                                             axis.text.y=element_blank(),axis.ticks=element_blank(),
                                             axis.title.x=element_blank(),
                                             axis.title.y=element_blank(),
                                             panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
                                             panel.grid.minor=element_blank(),plot.background=element_blank()) +
  ggtitle("Average Vote Share of Winning Prosecutors By Counties \n Above 50,000 Population, 1989-2021 (from ALED)")
ggsave("./figures/aledmap.pdf",height=8,width=10)

#map from unc: need to map to counties
#inconsistencies from one unc dataset to another: Kenedy-Kleberg, mills, fremont,Golden Valley, Musselshell
fips_counties <- counties %>%
  mutate(county_name=str_replace(county_name, " County", ""),
         county_name=str_replace(county_name, " Parish", ""),
         county_name=str_trim(tolower(county_name)))

unccounties <- read_xlsx("./UNC PPP - prosecutor political unit data.xlsx") %>%
  filter(Election_year<2020) %>%
  separate(`County/counties`,c("county1","county2","county3","county4","county5","county6","county7","county8"),
           remove=F,, sep = ",") %>% 
  select(-selection_mode,-`City name (for city-based election outliers only)`,-AMG_notes) %>% 
  pivot_longer(!c(`State name`,`District name/number`,`County/counties`,Election_year),
                                                 names_to="countynum",values_to="county_name") %>%
  filter(!is.na(county_name)) %>% mutate(county_name=str_trim(tolower(county_name))) %>%
  distinct() %>% full_join(.,unc_all,by=c("State name"="state","District name/number"="district"))

x <- unccounties %>% filter(is.na(cand_fname))

unc_avg <- unccounties %>% 
  filter("State name"!="Alaska") %>% filter("State name"!="Hawaii") %>%
  filter("State name"!="Connecticut") %>% filter("State name"!="Delaware") %>%
  filter("State name"!="New Jersey") %>%
  filter(winner_general_int==1) %>%
  mutate(vote_percent_general=ifelse(is.na(vote_percent_general),100,vote_percent_general)) %>%
  group_by(`District name/number`,county_name) %>% dplyr::summarise(`Vote Share`=mean(vote_percent_general)) %>%
  left_join(.,fips_counties,by=c("county_name"="county_name")) %>% filter(state_abbv!="HI") %>%
  filter(!is.na(`Vote Share`))

unc_avg %>%
  ggplot(aes(long, lat, group = group, fill = `Vote Share`)) +
  geom_polygon(color = NA) +
  scale_fill_gradient(guide = guide_colorbar(title.position = "top"),
                      low = "black", high = "lightgray") +
  geom_polygon(data = states %>% filter(state_name!="Alaska") %>%
                 filter(state_name!="Hawaii"), mapping = aes(long, lat, group = group),
               fill = NA, color = "gray") +
  coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
  theme(axis.line=element_blank(),axis.text.x=element_blank(),
        axis.text.y=element_blank(),axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),plot.background=element_blank()) +
  ggtitle("Average Vote Share of Winning Prosecutors in General Elections By County, 2012-2019 (from UNC)")
ggsave("./figures/uncmap.pdf",height=8,width=10)
